name: Run Tutorials

on:
  workflow_dispatch:  # Activate this workflow manually
  pull_request:
    paths:
      - 'tutorials/*.ipynb'

jobs:
  run-tutorials:
    runs-on: ubuntu-latest
    container: deepset/haystack:base-cpu-main

    services:
      elasticsearch:
        image: elasticsearch:7.9.2
        env:
          discovery.type: "single-node"
          ES_JAVA_OPTS: "-Xms128m -Xmx256m"

    env:
      HAYSTACK_TELEMETRY_ENABLED: "False"
      ELASTICSEARCH_HOST: "elasticsearch"

    steps:
      - name: Install dependencies
        run: |
          apt-get update && apt-get install -y git build-essential gcc libsndfile1 ffmpeg && rm -rf /var/lib/apt/lists/*
          pip install nbconvert ipython
          pip install "pyworld<=0.2.12" espnet espnet-model-zoo pydub
          pip install farm-haystack[pdf]

      - name: Install Haystack Extras text2speech dependencies
        run: |
          pip install farm-haystack-text2speech
      
      - name: Install Hugging Face datasets 
        run: |
          pip install "datasets>=2.6.1"

      - name: Checkout
        uses: actions/checkout@v3
        with:
          fetch-depth: 2

      # See https://github.com/actions/runner-images/issues/6775
      - name: Change Owner of Container Working Directory
        run: chown root:root .

      - name: Get changed files
        id: files
        uses: tj-actions/changed-files@v41

      - name: Convert notebooks to Python
        shell: bash
        run: |
          skiplist=(
              "tutorials/02_Finetune_a_model_on_your_data.ipynb"
              "tutorials/07_RAG_Generator.ipynb"
              "tutorials/09_DPR_training.ipynb"
              "tutorials/10_Knowledge_Graph.ipynb"
              "tutorials/12_LFQA.ipynb"
              "tutorials/13_Question_generation.ipynb"
              "tutorials/14_Query_Classifier.ipynb"
              "tutorials/18_GPL.ipynb"
              "tutorials/22_Pipeline_with_PromptNode.ipynb"
              "tutorials/23_Answering_Multihop_Questions_with_Agents.ipynb"
              "tutorials/24_Building_Chat_App.ipynb"
              "tutorials/25_Customizing_Agent.ipynb"
              "tutorials/27_First_RAG_Pipeline.ipynb"
              "tutorials/28_Structured_Output_With_Loop.ipynb"
              "tutorials/29_Serializing_Pipelines.ipynb"
              "tutorials/30_File_Type_Preprocessing_Index_Pipeline.ipynb"
              "tutorials/31_Metadata_Filtering.ipynb"
              "tutorials/32_Classifying_Documents_and_Queries_by_Language.ipynb"
              "tutorials/33_Hybrid_Retrieval.ipynb"
              "tutorials/34_Extractive_QA_Pipeline.ipynb"
              "tutorials/35_Model_Based_Evaluation_of_RAG_Pipelines.ipynb"
              "tutorials/36_Building_Fallbacks_with_Conditional_Routing.ipynb"
              "tutorials/37_Simplifying_Pipeline_Inputs_with_Multiplexer.ipynb"
              "tutorials/39_Embedding_Metadata_for_Improved_Retrieval.ipynb"
              "tutorials/40_Building_Chat_Application_with_Function_Calling.ipynb"
          )
          for changed_file in ${{ steps.files.outputs.all_changed_files }}; do
            if [[ $changed_file == *".ipynb" ]]; then
              if [[ ! " ${skiplist[*]} " =~ " ${changed_file} " ]]; then
                jupyter nbconvert --to python --RegexRemovePreprocessor.patterns '%%bash' ${changed_file}
              fi
            fi
          done

      - name: Run the converted notebooks
        # Note: the `+` at the end of the `find` invocation will make it fail if any
        # of the execs failed, otherwise `find` returns 0 even when the execs fail.
        run: |
          find ./tutorials -name "*.py" -execdir python {} +;
